package dataClean;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author legolas
 * @date 2020/3/14 下午4:36
 */

/**
 * k1:行首偏移量     v1：每一行的数据
 * k2：主播id       v2:清洗后主播详细数据信息
 */
public class DataCleanMap extends Mapper<LongWritable, Text, Text, Text> {


    @Override
    protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {
        //获取每一行内容
        String line = v1.toString();
        JSONObject jsonObj = JSON.parseObject(line);
        String id = jsonObj.getString("uid");
        int gold = jsonObj.getIntValue("gold");
        int watchnumpv = jsonObj.getIntValue("watchnumpv");
        int follower = jsonObj.getIntValue("follower");
        int length = jsonObj.getIntValue("length");


        //过滤掉异常数据

        if (!id.equals("null") && gold >= 0 && follower >= 0 && length >= 0) {
            Text k2 = new Text();
            k2.set(id);
            Text v2 = new Text();
            v2.set(gold + "\t" + watchnumpv + "\t" + follower + "\t" + length);
            context.write(k2, v2);
        }

    }
}
